***********************************************************************************
*** GROUP BASED TRAJECTORY MODEL: Conduct Problems ***
***********************************************************************************
*** VARIABLE GENERATION FOR TRAJECTORY MODELS ***
***********************************************************************************
***Generate a set of time variables for traj estimation (independent variable) --> use ages at corresponding sweep collection: Sweep 2(age5), Sweep 3(age10), Sweep 4(age16)***
gen age5 = 5
gen age10 = 10
gen age16 = 16

*** generate 1 new variable for each behaviour data point to handle missing data****
*Rutter Scale: Conduct problems
gen age5miss1=dCon-11
gen age10miss1=mCon-11
gen age16miss1=paCon-11
*** sum up the number of missing data across the variables, by row
egen totmiss1=rmiss(age5miss1-age16miss1)

***********************************************************************************
*** MODEL SELECTION --> First Stage
* 1. Determine the maximum number of groups
* 2. Run models: starting with two groups until maximum number of assigned groups (stepwise manner). All groups with quadratic function of time.
* 3. Finalize with the appropriate number of groups that best fits the data 
***********************************************************************************
*** Steps 1-2
***e.g., two group model:
traj if totmiss1<3, model(cnorm) var (dCon mCon paCon) indep (age5 age10 age16) order (2 2) min (1) max(3)

*** Step 3
***3 group model selected as best fit:
traj if totmiss1<3, model(cnorm) var (dCon mCon paCon) indep (age5 age10 age16) order (0 1 0) min (1) max(3)

matrix list e(plot1)
matrix a=e(plot1)
matrix b=a[1...,1..9]
matrix list b,format(%5.2f)
trajplot, xtitle("Age") ytitle("Conduct Problems") 

***********************************************************************************
***Step 4. Post Model Verification: Checking for Classification Errors and Model Fit
*** user-defined functions
************************************************************************************
* traj summary stats 
program summary_table_procTraj
    preserve
	** 
	drop if missing(_traj_Group) 
    *now lets look at the average posterior probability
    gen Mp = 0
    foreach i of varlist _traj_ProbG* {
        replace Mp = `i' if `i' > Mp 
    }
    sort _traj_Group
    *and the odds of correct classification
    by _traj_Group: gen countG = _N
    by _traj_Group: egen groupAPP = mean(Mp)
    by _traj_Group: gen counter = _n
    gen n = groupAPP/(1 - groupAPP)
    gen p = countG/ _N
    gen d = p/(1-p)
    gen occ = n/d
    *Estimated proportion for each group
    scalar c = 0
    gen TotProb = 0
    foreach i of varlist _traj_ProbG* {
       scalar c = c + 1
       quietly summarize `i'
       replace TotProb = r(sum)/ _N if _traj_Group == c 
    }
    *This displays the group number, the count per group, the average posterior probability for each group,
    *the odds of correct classification, and the observed probability of groups versus the probability 
    *based on the posterior probabilities
	gen GROUP_APP = round(groupAPP*100,.1)
	gen OCC = round(occ,.1)
	gen Probab = round(p*100,.1)
	gen Prob_post = round(TotProb*100,.1)
    list _traj_Group countG GROUP_APP OCC Probab Prob_post if counter == 1
    restore
end

summary_table_procTraj

***********************************************************************************
*** VARIABLE RENAMING/RECODING/GENERATION FROM TRAJECTORY MODEL ***
***********************************************************************************
gen cond = _traj_Group
recode cond (1=2)(2=1)(3=3)

generate cond12 = cond
recode   cond12 (1=0) (2=1) (3=.)

generate cond13 = cond
recode   cond13 (1=0) (2=.) (3=1)

